HW 03

Author

Cedric Destin

options(warn=-1)
if (!require("pacman")) 
  install.packages("pacman")

# use this line for installing/loading
pacman::p_load(tidyverse,
               glue,
               scales,
               openintro,
               gridExtra,
               ggrepel,
               ggmap,
               ggridges,
               dsbox,
               devtools,
               fs,
               janitor,
               here,
               dplyr,
               palmerpenguins,
               stringr,
               ggplot2,
               plotly,
               Hmisc,
               ggExtra
               ) 

1 - Du Bois challenge.

income <- read_csv(here("data", "income.csv"), 
                    show_col_types = FALSE)
income <- income |> 
  mutate(
      Rent_pct = Rent / 100,
      Food_pct = Food / 100,
      Clothes_pct = Clothes / 100,
      Tax_pct = Tax / 100,
      Other_pct = Other / 100
    )
income <- income %>%
  mutate(Class = paste0(as.character(Class), "   $", as.character(Average_Income)))
income$Class <- factor(income$Class, levels = c("$1000 AND OVER   $1125", "$750-1000   $880", "$500-750   $547", "$400-500   $433.82", "$300-400   $335.66", "$200-300   $249.45", "$100-200   $139.1"))
glimpse(income)
Rows: 7
Columns: 12
$ Class          <fct> $100-200   $139.1, $200-300   $249.45, $300-400   $335.…
$ Average_Income <dbl> 139.10, 249.45, 335.66, 433.82, 547.00, 880.00, 1125.00
$ Rent           <dbl> 19, 22, 23, 18, 13, 0, 0
$ Food           <dbl> 43, 47, 43, 37, 31, 37, 29
$ Clothes        <dbl> 28, 23, 18, 15, 17, 19, 16
$ Tax            <dbl> 9.9, 4.0, 4.5, 5.5, 5.0, 8.0, 4.5
$ Other          <dbl> 0.1, 4.0, 11.5, 24.5, 34.0, 36.0, 50.5
$ Rent_pct       <dbl> 0.19, 0.22, 0.23, 0.18, 0.13, 0.00, 0.00
$ Food_pct       <dbl> 0.43, 0.47, 0.43, 0.37, 0.31, 0.37, 0.29
$ Clothes_pct    <dbl> 0.28, 0.23, 0.18, 0.15, 0.17, 0.19, 0.16
$ Tax_pct        <dbl> 0.099, 0.040, 0.045, 0.055, 0.050, 0.080, 0.045
$ Other_pct      <dbl> 0.001, 0.040, 0.115, 0.245, 0.340, 0.360, 0.505
# Your original code, modified to include text labels
fig <- plot_ly(income, x = ~Rent, y = ~Class, 
               type = 'bar', 
               orientation = 'h',
               name = 'Rent',
               # --- Add these lines ---
               text = ~Rent_pct,                 # Use the percentage column for the text data
               textposition = 'inside',
               texttemplate = '%{text:.0%}',      # Format the text as a percentage with 0 decimal places
               # ----------------------
               marker = list(color = '#121210'))

fig <- fig %>% add_trace(x = ~Food, name = 'Food',
                         # --- Add these lines ---
                         text = ~Food_pct,
                         textposition = 'inside',
                         texttemplate = '%{text:.0%}',
                         # ----------------------
                         marker = list(color = '#7D5A7F'))

fig <- fig %>% add_trace(x = ~Clothes, name = 'Clothes',
                         # --- Add these lines ---
                         text = ~Clothes_pct,
                         textposition = 'inside',
                         texttemplate = '%{text:.0%}',
                         # ----------------------
                         marker = list(color = '#D79684'))

fig <- fig %>% add_trace(x = ~Tax, name = 'Tax',
                         # --- Add these lines ---
                         text = ~Tax_pct,
                         textposition = 'inside',
                         texttemplate = '%{text:.1%}',
                         # ----------------------
                         marker = list(color = '#003e80'))

fig <- fig %>% add_trace(x = ~Other, name = 'Other',
                         # --- Add these lines ---
                         text = ~Other_pct,
                         textposition = 'inside',
                         texttemplate = '%{text:.1%}',
                         # ----------------------
                         marker = list(color = '#e6f2ff'))

# Apply the layout (no changes needed here)
fig <- fig %>% layout(
  barmode = 'stack',
  title = "INCOME AND EXPENITURE OF 150 NEGRO FAMILIES IN ATLANTA, GA. U.S.A.",
  titlefont = list(size = 15, color = "#000000"),
  xaxis = list(
    title = "FOR FUTHER STATISTICS RAISE THIS FRAME",
    showticklabels = FALSE
  ),
    annotations = list(
      list(
        x = -0.28,
        y = 1.025,
        text = "Class     Actual Average",
        showarrow = FALSE,
        xref = "paper",
        yref = "paper"
      )
    ),
  yaxis = list(title = ""),
  showlegend = FALSE,
  plot_bgcolor = "#CAB2A0",   # inside plot area
  paper_bgcolor = "#CAB2A0"   # outside plot area
)

# Display the figure
fig

2 - COVID survey - interpret

The plot illustrates opinions made by several different groups regarding the COVID vaccine below are three observations I made: 1. Nurses seem to strongly recommend the vaccine, with a very small error bar, illustrating that most nurses are of the same mindset. In fact, most groups seem to recommend the vaccines except for a few groups, “Had COVID vaccine: No” and “Gender: Prefer not to say”. 2. Most groups were very confident in the scientific vetting the process for the new COVID vaccines, again, only a few groups showed disagreement to it, being the same groups disagreeing with the previous statement: “Had COVID vaccine: No” and “Gender: Prefer not to say”. 3. This time we could observe two very interesting groups, the “Had COVID vaccine” and the “Had flu vaccine this year” a. It is understandable that the people who had the COVID vaccine responded positively about the vaccine, like the people that had the flu vaccine this year, they mostly had positive feedback. b. On the other hand, the groups that did not have COVID vaccines had responses that were split straight in the middle, but with large amount of uncertainty. This illustrates that the consensus regarding the vaccine within that group is very broad, and that they may not have had the vaccines for a variety of reasons.

3 - COVID survey - reconstruct

covid_survey <- read_csv(here("data", "covid-survey.csv"),
                         show_col_types = FALSE, skip = 1)
glimpse(covid_survey)
Rows: 1,121
Columns: 14
$ response_id             <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,…
$ exp_profession          <dbl> 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ exp_flu_vax             <dbl> 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ exp_gender              <dbl> 0, 1, NA, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, …
$ exp_race                <dbl> 2, 2, NA, 5, 5, 5, 5, 5, 5, 2, 5, 5, 2, 5, 5, …
$ exp_ethnicity           <dbl> 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
$ exp_age_bin             <dbl> 25, 20, NA, 25, 25, 25, 25, 25, 20, 20, 20, 25…
$ exp_already_vax         <dbl> 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ resp_safety             <dbl> 5, 5, NA, 5, 5, 5, 5, 4, 4, 5, 5, 5, 5, 5, 5, …
$ resp_confidence_science <dbl> 2, 1, NA, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, …
$ resp_concern_safety     <dbl> 2, 1, NA, 1, 1, 1, 1, 4, 4, 1, 2, 2, 3, 1, 3, …
$ resp_feel_safe_at_work  <dbl> 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ resp_will_recommend     <dbl> 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ resp_trust_info         <dbl> 1, 1, NA, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, …
print(
  dim(covid_survey)
)
[1] 1121   14
covid_survey <- covid_survey %>%
  filter(if_all(-response_id, ~ !is.na(.)))

print(
  dim(covid_survey)
)
[1] 926  14
covid_survey <- covid_survey %>%
  mutate(
    exp_already_vax = ifelse(exp_already_vax == 0, "No", "Yes"),
    exp_flu_vax = ifelse(exp_flu_vax == 0, "No", "Yes"),
    exp_profession = ifelse(exp_profession == 0, "Medical", "Nursing"),
    exp_gender = ifelse(exp_gender == 0, "Male", 
    ifelse(exp_gender == 1, "Female", 
            ifelse(exp_gender == 3, "Non-binary third gender", "Prefer not to say"))),
    exp_race = ifelse(exp_race == 1, "American Indian / Alaskan Native", 
                      ifelse(exp_race == 2, "Asian", 
                      ifelse(exp_race == 3, "Black or African American", 
                      ifelse(exp_race == 4, "Native Hawaiian / Other Pacific Islander", "White")))),
    exp_ethnicity = ifelse(exp_ethnicity == 1, "Hispanic / Latino", "Non-Hispanic/Non-Latino"),
    exp_age_bin = case_when(
      exp_age_bin == 0 ~ "<20",
      exp_age_bin == 20 ~ "21-25",
      exp_age_bin == 25 ~ "26-30",
      exp_age_bin == 30 ~ ">30"
    )
  )

print(
  dim(covid_survey)
)
[1] 926  14
covid_survey_longer <- covid_survey |>
  pivot_longer(
    cols = starts_with("exp_"),
    names_to = "explanatory",
    values_to = "explanatory_value"
  ) |>
  filter(!is.na(explanatory_value)) |>
  pivot_longer(
    cols = starts_with("resp_"),
    names_to = "response",
    values_to = "response_value"
  )

print(covid_survey_longer)
# A tibble: 38,892 × 5
   response_id explanatory    explanatory_value response          response_value
         <dbl> <chr>          <chr>             <chr>                      <dbl>
 1           1 exp_profession Nursing           resp_safety                    5
 2           1 exp_profession Nursing           resp_confidence_…              2
 3           1 exp_profession Nursing           resp_concern_saf…              2
 4           1 exp_profession Nursing           resp_feel_safe_a…              1
 5           1 exp_profession Nursing           resp_will_recomm…              1
 6           1 exp_profession Nursing           resp_trust_info                1
 7           1 exp_flu_vax    Yes               resp_safety                    5
 8           1 exp_flu_vax    Yes               resp_confidence_…              2
 9           1 exp_flu_vax    Yes               resp_concern_saf…              2
10           1 exp_flu_vax    Yes               resp_feel_safe_a…              1
# ℹ 38,882 more rows
covid_survey_summary_stats_by_group <- covid_survey_longer %>%
  group_by(explanatory, explanatory_value, response) %>%
  summarise(
    mean = mean(as.numeric(response_value), na.rm = TRUE),
    low = quantile(as.numeric(response_value), probs = 0.10, na.rm = TRUE),
    high = quantile(as.numeric(response_value), probs = 0.90, na.rm = TRUE)
  )

print(covid_survey_summary_stats_by_group, n = Inf)
# A tibble: 126 × 6
# Groups:   explanatory, explanatory_value [21]
    explanatory     explanatory_value                 response  mean   low  high
    <chr>           <chr>                             <chr>    <dbl> <dbl> <dbl>
  1 exp_age_bin     21-25                             resp_co…  3.32  2     5   
  2 exp_age_bin     21-25                             resp_co…  1.30  1     2   
  3 exp_age_bin     21-25                             resp_fe…  1.18  1     2   
  4 exp_age_bin     21-25                             resp_sa…  1.97  1     5   
  5 exp_age_bin     21-25                             resp_tr…  1.29  1     2   
  6 exp_age_bin     21-25                             resp_wi…  1.09  1     1   
  7 exp_age_bin     26-30                             resp_co…  3.32  1     5   
  8 exp_age_bin     26-30                             resp_co…  1.39  1     2   
  9 exp_age_bin     26-30                             resp_fe…  1.27  1     2   
 10 exp_age_bin     26-30                             resp_sa…  2.17  1     5   
 11 exp_age_bin     26-30                             resp_tr…  1.35  1     2   
 12 exp_age_bin     26-30                             resp_wi…  1.18  1     1   
 13 exp_age_bin     <20                               resp_co…  3.31  2     4.5 
 14 exp_age_bin     <20                               resp_co…  1.69  1     2.5 
 15 exp_age_bin     <20                               resp_fe…  1.75  1     4   
 16 exp_age_bin     <20                               resp_sa…  1.44  1     2   
 17 exp_age_bin     <20                               resp_tr…  1.38  1     2   
 18 exp_age_bin     <20                               resp_wi…  1.38  1     2   
 19 exp_age_bin     >30                               resp_co…  3.02  1     5   
 20 exp_age_bin     >30                               resp_co…  1.69  1     3   
 21 exp_age_bin     >30                               resp_fe…  1.75  1     4   
 22 exp_age_bin     >30                               resp_sa…  1.83  1     4   
 23 exp_age_bin     >30                               resp_tr…  1.63  1     3   
 24 exp_age_bin     >30                               resp_wi…  1.46  1     3   
 25 exp_already_vax No                                resp_co…  2.17  1     4   
 26 exp_already_vax No                                resp_co…  3.27  1.10  5   
 27 exp_already_vax No                                resp_fe…  3.83  2     5   
 28 exp_already_vax No                                resp_sa…  2.85  1     4.9 
 29 exp_already_vax No                                resp_tr…  3.12  1     5   
 30 exp_already_vax No                                resp_wi…  3.08  1     5   
 31 exp_already_vax Yes                               resp_co…  3.33  1     5   
 32 exp_already_vax Yes                               resp_co…  1.32  1     2   
 33 exp_already_vax Yes                               resp_fe…  1.19  1     2   
 34 exp_already_vax Yes                               resp_sa…  2.00  1     5   
 35 exp_already_vax Yes                               resp_tr…  1.28  1     2   
 36 exp_already_vax Yes                               resp_wi…  1.09  1     1   
 37 exp_ethnicity   Hispanic / Latino                 resp_co…  3.07  2     5   
 38 exp_ethnicity   Hispanic / Latino                 resp_co…  1.49  1     2   
 39 exp_ethnicity   Hispanic / Latino                 resp_fe…  1.32  1     2   
 40 exp_ethnicity   Hispanic / Latino                 resp_sa…  2.05  1     5   
 41 exp_ethnicity   Hispanic / Latino                 resp_tr…  1.37  1     2   
 42 exp_ethnicity   Hispanic / Latino                 resp_wi…  1.12  1     1.40
 43 exp_ethnicity   Non-Hispanic/Non-Latino           resp_co…  3.28  1     5   
 44 exp_ethnicity   Non-Hispanic/Non-Latino           resp_co…  1.42  1     2   
 45 exp_ethnicity   Non-Hispanic/Non-Latino           resp_fe…  1.34  1     2   
 46 exp_ethnicity   Non-Hispanic/Non-Latino           resp_sa…  2.04  1     5   
 47 exp_ethnicity   Non-Hispanic/Non-Latino           resp_tr…  1.39  1     2   
 48 exp_ethnicity   Non-Hispanic/Non-Latino           resp_wi…  1.21  1     2   
 49 exp_flu_vax     No                                resp_co…  3.13  1     5   
 50 exp_flu_vax     No                                resp_co…  1.91  1     4   
 51 exp_flu_vax     No                                resp_fe…  1.98  1     5   
 52 exp_flu_vax     No                                resp_sa…  2     1     5   
 53 exp_flu_vax     No                                resp_tr…  1.91  1     4   
 54 exp_flu_vax     No                                resp_wi…  1.74  1     3.4 
 55 exp_flu_vax     Yes                               resp_co…  3.27  1     5   
 56 exp_flu_vax     Yes                               resp_co…  1.40  1     2   
 57 exp_flu_vax     Yes                               resp_fe…  1.30  1     2   
 58 exp_flu_vax     Yes                               resp_sa…  2.05  1     5   
 59 exp_flu_vax     Yes                               resp_tr…  1.36  1     2   
 60 exp_flu_vax     Yes                               resp_wi…  1.18  1     2   
 61 exp_gender      Female                            resp_co…  3.34  1     5   
 62 exp_gender      Female                            resp_co…  1.31  1     2   
 63 exp_gender      Female                            resp_fe…  1.35  1     2   
 64 exp_gender      Female                            resp_sa…  2.09  1     5   
 65 exp_gender      Female                            resp_tr…  1.33  1     2   
 66 exp_gender      Female                            resp_wi…  1.20  1     2   
 67 exp_gender      Male                              resp_co…  3.25  1.80  5   
 68 exp_gender      Male                              resp_co…  1.45  1     2   
 69 exp_gender      Male                              resp_fe…  1.32  1     2   
 70 exp_gender      Male                              resp_sa…  2.01  1     5   
 71 exp_gender      Male                              resp_tr…  1.39  1     2   
 72 exp_gender      Male                              resp_wi…  1.20  1     2   
 73 exp_gender      Non-binary third gender           resp_co…  2.7   1     4.1 
 74 exp_gender      Non-binary third gender           resp_co…  1.6   1     2.3 
 75 exp_gender      Non-binary third gender           resp_fe…  1.4   1     2.2 
 76 exp_gender      Non-binary third gender           resp_sa…  3     1     5   
 77 exp_gender      Non-binary third gender           resp_tr…  1.5   1     2.2 
 78 exp_gender      Non-binary third gender           resp_wi…  1.2   1     1.2 
 79 exp_gender      Prefer not to say                 resp_co…  2.67  1.5   3.5 
 80 exp_gender      Prefer not to say                 resp_co…  3     1     5   
 81 exp_gender      Prefer not to say                 resp_fe…  3.17  1     5   
 82 exp_gender      Prefer not to say                 resp_sa…  2.17  1     3   
 83 exp_gender      Prefer not to say                 resp_tr…  2.83  1     5   
 84 exp_gender      Prefer not to say                 resp_wi…  2.5   1.5   3.5 
 85 exp_profession  Medical                           resp_co…  3.14  1     5   
 86 exp_profession  Medical                           resp_co…  1.62  1     3   
 87 exp_profession  Medical                           resp_fe…  1.63  1     3   
 88 exp_profession  Medical                           resp_sa…  1.54  1     3   
 89 exp_profession  Medical                           resp_tr…  1.54  1     3   
 90 exp_profession  Medical                           resp_wi…  1.41  1     2   
 91 exp_profession  Nursing                           resp_co…  3.31  1     5   
 92 exp_profession  Nursing                           resp_co…  1.36  1     2   
 93 exp_profession  Nursing                           resp_fe…  1.24  1     2   
 94 exp_profession  Nursing                           resp_sa…  2.22  1     5   
 95 exp_profession  Nursing                           resp_tr…  1.33  1     2   
 96 exp_profession  Nursing                           resp_wi…  1.13  1     1   
 97 exp_race        American Indian / Alaskan Native  resp_co…  2.73  1     4   
 98 exp_race        American Indian / Alaskan Native  resp_co…  2     1     5   
 99 exp_race        American Indian / Alaskan Native  resp_fe…  2.09  1     5   
100 exp_race        American Indian / Alaskan Native  resp_sa…  2     1     3   
101 exp_race        American Indian / Alaskan Native  resp_tr…  1.91  1     5   
102 exp_race        American Indian / Alaskan Native  resp_wi…  1.82  1     3   
103 exp_race        Asian                             resp_co…  3.16  2     5   
104 exp_race        Asian                             resp_co…  1.32  1     2   
105 exp_race        Asian                             resp_fe…  1.14  1     2   
106 exp_race        Asian                             resp_sa…  2.1   1     5   
107 exp_race        Asian                             resp_tr…  1.27  1     2   
108 exp_race        Asian                             resp_wi…  1.06  1     1   
109 exp_race        Black or African American         resp_co…  2.89  1     5   
110 exp_race        Black or African American         resp_co…  1.53  1     2.6 
111 exp_race        Black or African American         resp_fe…  1.56  1     3   
112 exp_race        Black or African American         resp_sa…  1.89  1     4.6 
113 exp_race        Black or African American         resp_tr…  1.44  1     2   
114 exp_race        Black or African American         resp_wi…  1.44  1     2.6 
115 exp_race        Native Hawaiian / Other Pacific … resp_co…  3.67  2.4   4.8 
116 exp_race        Native Hawaiian / Other Pacific … resp_co…  1.67  1.2   2   
117 exp_race        Native Hawaiian / Other Pacific … resp_fe…  1.33  1     1.8 
118 exp_race        Native Hawaiian / Other Pacific … resp_sa…  1.67  1.2   2   
119 exp_race        Native Hawaiian / Other Pacific … resp_tr…  1.67  1.2   2   
120 exp_race        Native Hawaiian / Other Pacific … resp_wi…  1     1     1   
121 exp_race        White                             resp_co…  3.32  1     5   
122 exp_race        White                             resp_co…  1.44  1     2   
123 exp_race        White                             resp_fe…  1.37  1     2   
124 exp_race        White                             resp_sa…  2.04  1     5   
125 exp_race        White                             resp_tr…  1.40  1     2   
126 exp_race        White                             resp_wi…  1.22  1     2   
covid_survey_summary_stats_all <- covid_survey_longer %>%
  group_by(response) %>%
  summarise(
    mean = mean(as.numeric(response_value), na.rm = TRUE),
    low = quantile(as.numeric(response_value), probs = 0.10, na.rm = TRUE),
    high = quantile(as.numeric(response_value), probs = 0.90, na.rm = TRUE),
    explanatory = "All",
    explanatory_value = ""
  )

print(covid_survey_summary_stats_all, n = Inf)
# A tibble: 6 × 6
  response                 mean   low  high explanatory explanatory_value
  <chr>                   <dbl> <dbl> <dbl> <chr>       <chr>            
1 resp_concern_safety      3.26     1     5 All         ""               
2 resp_confidence_science  1.43     1     2 All         ""               
3 resp_feel_safe_at_work   1.34     1     2 All         ""               
4 resp_safety              2.04     1     5 All         ""               
5 resp_trust_info          1.38     1     2 All         ""               
6 resp_will_recommend      1.21     1     2 All         ""               
covid_survey_summary_stats <- bind_rows(
  covid_survey_summary_stats_by_group,
  covid_survey_summary_stats_all
)

print(covid_survey_summary_stats)
# A tibble: 132 × 6
# Groups:   explanatory, explanatory_value [22]
   explanatory explanatory_value response                 mean   low  high
   <chr>       <chr>             <chr>                   <dbl> <dbl> <dbl>
 1 exp_age_bin 21-25             resp_concern_safety      3.32     2     5
 2 exp_age_bin 21-25             resp_confidence_science  1.30     1     2
 3 exp_age_bin 21-25             resp_feel_safe_at_work   1.18     1     2
 4 exp_age_bin 21-25             resp_safety              1.97     1     5
 5 exp_age_bin 21-25             resp_trust_info          1.29     1     2
 6 exp_age_bin 21-25             resp_will_recommend      1.09     1     1
 7 exp_age_bin 26-30             resp_concern_safety      3.32     1     5
 8 exp_age_bin 26-30             resp_confidence_science  1.39     1     2
 9 exp_age_bin 26-30             resp_feel_safe_at_work   1.27     1     2
10 exp_age_bin 26-30             resp_safety              2.17     1     5
# ℹ 122 more rows
covid_survey_summary_stats  <- covid_survey_summary_stats %>%
  mutate(
    explanatory = case_when(
      explanatory == "exp_age_bin" ~ "Age",
      explanatory == "exp_already_vax" ~ "Had COVID vaccine",
      explanatory == "exp_flu_vax" ~ "Had flu vaccine this year",
      explanatory == "exp_profession" ~ "Profession",
      explanatory == "exp_gender" ~ "Gender",
      explanatory == "exp_race" ~ "Race",
      explanatory == "exp_ethnicity" ~ "Ethnicity",
      explanatory == "All" ~ "All"
      ),
      response = case_when(
      response == "resp_safety" ~ "Based on my understanding, I believe the vaccine is safe",
      response == "resp_confidence_science" ~ "I am confident in the scientific vetting process for the new COVID vaccines",
      response == "resp_feel_safe_at_work" ~ "Getting the vaccine will make me feel safer at work",
      response == "resp_will_recommend" ~ "I will recommend the vaccine to family, friends, and community members",
      response == "resp_trust_info" ~ "I trust the information that I have received about the vaccines",
      response == "resp_concern_safety" ~ "I am concerned about the safety and side effects of the vaccine"
    )
  )
print(covid_survey_summary_stats)
# A tibble: 132 × 6
# Groups:   explanatory, explanatory_value [22]
   explanatory explanatory_value response                       mean   low  high
   <chr>       <chr>             <chr>                         <dbl> <dbl> <dbl>
 1 Age         21-25             I am concerned about the saf…  3.32     2     5
 2 Age         21-25             I am confident in the scient…  1.30     1     2
 3 Age         21-25             Getting the vaccine will mak…  1.18     1     2
 4 Age         21-25             Based on my understanding, I…  1.97     1     5
 5 Age         21-25             I trust the information that…  1.29     1     2
 6 Age         21-25             I will recommend the vaccine…  1.09     1     1
 7 Age         26-30             I am concerned about the saf…  3.32     1     5
 8 Age         26-30             I am confident in the scient…  1.39     1     2
 9 Age         26-30             Getting the vaccine will mak…  1.27     1     2
10 Age         26-30             Based on my understanding, I…  2.17     1     5
# ℹ 122 more rows
custom_order <- c("All", "Age", "Gender", "Race", "Ethnicity", "Profession", "Had COVID vaccine", "Had flu vaccine this year")
covid_survey_summary_stats$explanatory <- factor(covid_survey_summary_stats$explanatory, levels = custom_order)
covid_survey_summary_stats_sorted <- covid_survey_summary_stats[order(covid_survey_summary_stats$explanatory), ]
print(covid_survey_summary_stats_sorted)
# A tibble: 132 × 6
# Groups:   explanatory, explanatory_value [22]
   explanatory explanatory_value response                       mean   low  high
   <fct>       <chr>             <chr>                         <dbl> <dbl> <dbl>
 1 All         ""                I am concerned about the saf…  3.26     1     5
 2 All         ""                I am confident in the scient…  1.43     1     2
 3 All         ""                Getting the vaccine will mak…  1.34     1     2
 4 All         ""                Based on my understanding, I…  2.04     1     5
 5 All         ""                I trust the information that…  1.38     1     2
 6 All         ""                I will recommend the vaccine…  1.21     1     2
 7 Age         "21-25"           I am concerned about the saf…  3.32     2     5
 8 Age         "21-25"           I am confident in the scient…  1.30     1     2
 9 Age         "21-25"           Getting the vaccine will mak…  1.18     1     2
10 Age         "21-25"           Based on my understanding, I…  1.97     1     5
# ℹ 122 more rows
#| label: change_order_level_2
custom_order <- c("", ">30", "26-30", "21-25", "<20", "Prefer not to say", "Non-binary third gender", "Male", "Female", 
"White", "Native Hawaiian / Other Pacific Islander", "Black or African American", "Asian",  "American Indian / Alaskan Native", 
"Non-Hispanic/Non-Latino", "Hispanic / Latino", "Nursing", "Medical", "Yes", "No")
covid_survey_summary_stats_sorted$explanatory_value <- factor(covid_survey_summary_stats_sorted$explanatory_value, levels = custom_order)
covid_survey_summary_stats_sorted <- covid_survey_summary_stats_sorted[order(covid_survey_summary_stats_sorted$explanatory_value), ]
print(covid_survey_summary_stats_sorted)
ggplot(covid_survey_summary_stats_sorted, aes(x = mean, y = explanatory_value)) +
  geom_point(position = position_dodge(width = 0.5)) +
  geom_errorbarh(aes(xmin = low, xmax = high), height = 0.2, position = position_dodge(width = 0.5)) +
  facet_grid(cols = vars(response), 
              rows = vars(explanatory),
              labeller = labeller(response = label_wrap_gen(15),
                                  explanatory = label_wrap_gen(15)),
              space = "free_y",
              scales = "free_y") +
  labs(x = "Mean likert score \n (Error bars range from 10th to 90th percentile)", y = "") + 
  theme_minimal() +
  theme(
    strip.background = element_rect(fill = "gray90", color = "lightgray"),
    strip.text.x = element_text(angle = 0),
    strip.text.y = element_text(angle = 0)
  ) + 
  removeGrid()

4 - COVID survey - re-reconstruct

covid_survey_summary_stats_all <- covid_survey_longer %>%
  group_by(response) %>%
  summarise(
    mean = mean(as.numeric(response_value), na.rm = TRUE),
    low = quantile(as.numeric(response_value), probs = 0.25, na.rm = TRUE),
    high = quantile(as.numeric(response_value), probs = 0.75, na.rm = TRUE),
    explanatory = "All",
    explanatory_value = ""
  )

print(covid_survey_summary_stats_all, n = Inf)
# A tibble: 6 × 6
  response                 mean   low  high explanatory explanatory_value
  <chr>                   <dbl> <dbl> <dbl> <chr>       <chr>            
1 resp_concern_safety      3.26     2     4 All         ""               
2 resp_confidence_science  1.43     1     2 All         ""               
3 resp_feel_safe_at_work   1.34     1     1 All         ""               
4 resp_safety              2.04     1     3 All         ""               
5 resp_trust_info          1.38     1     2 All         ""               
6 resp_will_recommend      1.21     1     1 All         ""               
covid_survey_summary_stats <- bind_rows(
  covid_survey_summary_stats_by_group,
  covid_survey_summary_stats_all
)

print(covid_survey_summary_stats)
# A tibble: 132 × 6
# Groups:   explanatory, explanatory_value [22]
   explanatory explanatory_value response                 mean   low  high
   <chr>       <chr>             <chr>                   <dbl> <dbl> <dbl>
 1 exp_age_bin 21-25             resp_concern_safety      3.32     2     5
 2 exp_age_bin 21-25             resp_confidence_science  1.30     1     2
 3 exp_age_bin 21-25             resp_feel_safe_at_work   1.18     1     2
 4 exp_age_bin 21-25             resp_safety              1.97     1     5
 5 exp_age_bin 21-25             resp_trust_info          1.29     1     2
 6 exp_age_bin 21-25             resp_will_recommend      1.09     1     1
 7 exp_age_bin 26-30             resp_concern_safety      3.32     1     5
 8 exp_age_bin 26-30             resp_confidence_science  1.39     1     2
 9 exp_age_bin 26-30             resp_feel_safe_at_work   1.27     1     2
10 exp_age_bin 26-30             resp_safety              2.17     1     5
# ℹ 122 more rows
covid_survey_summary_stats  <- covid_survey_summary_stats %>%
  mutate(
    explanatory = case_when(
      explanatory == "exp_age_bin" ~ "Age",
      explanatory == "exp_already_vax" ~ "Had COVID vaccine",
      explanatory == "exp_flu_vax" ~ "Had flu vaccine this year",
      explanatory == "exp_profession" ~ "Profession",
      explanatory == "exp_gender" ~ "Gender",
      explanatory == "exp_race" ~ "Race",
      explanatory == "exp_ethnicity" ~ "Ethnicity",
      explanatory == "All" ~ "All"
      ),
      response = case_when(
      response == "resp_safety" ~ "Based on my understanding, I believe the vaccine is safe",
      response == "resp_confidence_science" ~ "I am confident in the scientific vetting process for the new COVID vaccines",
      response == "resp_feel_safe_at_work" ~ "Getting the vaccine will make me feel safer at work",
      response == "resp_will_recommend" ~ "I will recommend the vaccine to family, friends, and community members",
      response == "resp_trust_info" ~ "I trust the information that I have received about the vaccines",
      response == "resp_concern_safety" ~ "I am concerned about the safety and side effects of the vaccine"
    )
  )
print(covid_survey_summary_stats)
# A tibble: 132 × 6
# Groups:   explanatory, explanatory_value [22]
   explanatory explanatory_value response                       mean   low  high
   <chr>       <chr>             <chr>                         <dbl> <dbl> <dbl>
 1 Age         21-25             I am concerned about the saf…  3.32     2     5
 2 Age         21-25             I am confident in the scient…  1.30     1     2
 3 Age         21-25             Getting the vaccine will mak…  1.18     1     2
 4 Age         21-25             Based on my understanding, I…  1.97     1     5
 5 Age         21-25             I trust the information that…  1.29     1     2
 6 Age         21-25             I will recommend the vaccine…  1.09     1     1
 7 Age         26-30             I am concerned about the saf…  3.32     1     5
 8 Age         26-30             I am confident in the scient…  1.39     1     2
 9 Age         26-30             Getting the vaccine will mak…  1.27     1     2
10 Age         26-30             Based on my understanding, I…  2.17     1     5
# ℹ 122 more rows
custom_order <- c("All", "Age", "Gender", "Race", "Ethnicity", "Profession", "Had COVID vaccine", "Had flu vaccine this year")
covid_survey_summary_stats$explanatory <- factor(covid_survey_summary_stats$explanatory, levels = custom_order)
covid_survey_summary_stats_sorted_2 <- covid_survey_summary_stats[order(covid_survey_summary_stats$explanatory), ]
print(covid_survey_summary_stats_sorted_2)
# A tibble: 132 × 6
# Groups:   explanatory, explanatory_value [22]
   explanatory explanatory_value response                       mean   low  high
   <fct>       <chr>             <chr>                         <dbl> <dbl> <dbl>
 1 All         ""                I am concerned about the saf…  3.26     2     4
 2 All         ""                I am confident in the scient…  1.43     1     2
 3 All         ""                Getting the vaccine will mak…  1.34     1     1
 4 All         ""                Based on my understanding, I…  2.04     1     3
 5 All         ""                I trust the information that…  1.38     1     2
 6 All         ""                I will recommend the vaccine…  1.21     1     1
 7 Age         "21-25"           I am concerned about the saf…  3.32     2     5
 8 Age         "21-25"           I am confident in the scient…  1.30     1     2
 9 Age         "21-25"           Getting the vaccine will mak…  1.18     1     2
10 Age         "21-25"           Based on my understanding, I…  1.97     1     5
# ℹ 122 more rows
custom_order_2 <- c("", ">30", "26-30", "21-25", "<20", "Prefer not to say", "Non-binary third gender", "Male", "Female", 
"White", "Native Hawaiian / Other Pacific Islander", "Black or African American", "Asian",  "American Indian / Alaskan Native", 
"Non-Hispanic/Non-Latino", "Hispanic / Latino", "Nursing", "Medical", "Yes", "No")
covid_survey_summary_stats_sorted_2$explanatory_value <- factor(covid_survey_summary_stats_sorted_2$explanatory_value, levels = custom_order_2)
covid_survey_summary_stats_sorted_2 <- covid_survey_summary_stats_sorted_2[order(covid_survey_summary_stats_sorted_2$explanatory_value), ]
print(covid_survey_summary_stats_sorted_2)
# A tibble: 132 × 6
# Groups:   explanatory, explanatory_value [22]
   explanatory explanatory_value response                       mean   low  high
   <fct>       <fct>             <chr>                         <dbl> <dbl> <dbl>
 1 All         ""                I am concerned about the saf…  3.26     2     4
 2 All         ""                I am confident in the scient…  1.43     1     2
 3 All         ""                Getting the vaccine will mak…  1.34     1     1
 4 All         ""                Based on my understanding, I…  2.04     1     3
 5 All         ""                I trust the information that…  1.38     1     2
 6 All         ""                I will recommend the vaccine…  1.21     1     1
 7 Age         ">30"             I am concerned about the saf…  3.02     1     5
 8 Age         ">30"             I am confident in the scient…  1.69     1     3
 9 Age         ">30"             Getting the vaccine will mak…  1.75     1     4
10 Age         ">30"             Based on my understanding, I…  1.83     1     4
# ℹ 122 more rows
ggplot(covid_survey_summary_stats_sorted_2, aes(x = mean, y = explanatory_value)) +
  geom_point(position = position_dodge(width = 0.5)) +
  geom_errorbarh(aes(xmin = low, xmax = high), height = 0.2, position = position_dodge(width = 0.5)) +
  facet_grid(cols = vars(response), 
              rows = vars(explanatory),
              labeller = labeller(response = label_wrap_gen(15),
                                  explanatory = label_wrap_gen(15)),
              space = "free_y",
              scales = "free_y") +
  labs(x = "Mean likert score \n (Error bars range from 25th to 75th percentile)", y = "") + 
  theme_minimal() +
  theme(
    strip.background = element_rect(fill = "gray90", color = "lightgray"),
    strip.text.x = element_text(angle = 0),
    strip.text.y = element_text(angle = 0)
  ) + 
  removeGrid()

5 - COVID survey - another view

covid_survey_longer <- covid_survey_longer %>%
  mutate(
    response_value = as.numeric(response_value),
    explanatory = case_when(
      explanatory == "exp_age_bin" ~ "Age",
      explanatory == "exp_already_vax" ~ "Had COVID vaccine",
      explanatory == "exp_flu_vax" ~ "Had flu vaccine this year",
      explanatory == "exp_profession" ~ "Profession",
      explanatory == "exp_gender" ~ "Gender",
      explanatory == "exp_race" ~ "Race",
      explanatory == "exp_ethnicity" ~ "Ethnicity"
    ),
    response = case_when(
      response == "resp_safety" ~ "Based on my understanding, I believe the vaccine is safe",
      response == "resp_confidence_science" ~ "I am confident in the scientific vetting process for the new COVID vaccines",
      response == "resp_feel_safe_at_work" ~ "Getting the vaccine will make me feel safer at work",
      response == "resp_will_recommend" ~ "I will recommend the vaccine to family, friends, and community members",
      response == "resp_trust_info" ~ "I trust the information that I have received about the vaccines",
      response == "resp_concern_safety" ~ "I am concerned about the safety and side effects of the vaccine"
    )
  ) 
print(covid_survey_longer)
# A tibble: 38,892 × 5
   response_id explanatory             explanatory_value response response_value
         <dbl> <chr>                   <chr>             <chr>             <dbl>
 1           1 Profession              Nursing           Based o…              5
 2           1 Profession              Nursing           I am co…              2
 3           1 Profession              Nursing           I am co…              2
 4           1 Profession              Nursing           Getting…              1
 5           1 Profession              Nursing           I will …              1
 6           1 Profession              Nursing           I trust…              1
 7           1 Had flu vaccine this y… Yes               Based o…              5
 8           1 Had flu vaccine this y… Yes               I am co…              2
 9           1 Had flu vaccine this y… Yes               I am co…              2
10           1 Had flu vaccine this y… Yes               Getting…              1
# ℹ 38,882 more rows
covid_survey_longer <- covid_survey_longer %>%
  mutate(
    max = 5
    )

covid_survey_all <- covid_survey_longer %>%
  group_by(response) %>%
  summarise(
    total = sum(as.numeric(response_value), na.rm = TRUE)
  )
  covid_survey_all <- covid_survey_all %>%
    mutate(total_sum = sum(total, na.rm = TRUE))

covid_survey_all <- covid_survey_all %>%
  mutate(pct = total / total_sum) %>%
  select(-total, -total_sum)
print(covid_survey_all, n = Inf)
# A tibble: 6 × 2
  response                                                                   pct
  <chr>                                                                    <dbl>
1 Based on my understanding, I believe the vaccine is safe                 0.192
2 Getting the vaccine will make me feel safer at work                      0.126
3 I am concerned about the safety and side effects of the vaccine          0.306
4 I am confident in the scientific vetting process for the new COVID vacc… 0.134
5 I trust the information that I have received about the vaccines          0.130
6 I will recommend the vaccine to family, friends, and community members   0.113
ggplot(covid_survey_all, aes(x = response, y = pct)) +
  geom_col() +
  coord_flip() +
  labs(x = "", y = "Proportion") +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  scale_x_discrete(labels = scales::label_wrap(25)) + 
  theme(
    axis.text.y = element_text(size = 14)
  )

ggplot(covid_survey_all, aes(y = pct, fill = response, x = "")) +
  geom_bar(position="stack", stat="identity")